import re  
import requests  
  
# 设定要爬取的页数  
num = 1  
max_pages = 6     
base_url = 'https://tieba.baidu.com/p/9087199359?pn='  
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}  
 
with open("tieba_data.txt", "a", encoding="utf-8") as f:  
    for i in range(num, max_pages + 1):  
        url = base_url + str(i)  
        response = requests.get(url, headers=headers)  
        content = response.text
        #提取发帖人和发帖时间  
        posters = re.findall(r'<a\s+.*?class="p_author_name j_user_card".*?>(.*?)</a>', content, re.S)  
        times = re.findall(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}', content, re.S)  
        #获取发帖内容
        contents=re.findall('<div.*?class="d_post_content j_d_post_content ".*?>(.*?)<br>',content,re.DOTALL)  
        # 输出或保存结果  
        for poster, time,con in zip(posters, times,contents):  
            f.write(f"发帖人: {poster}, 发帖时间: {time},内容:{con}\n")  
f.close()

